results_dir <- "results/final"
figures_dir <- "figures/final"
sourcedata_dir <- "source_data/final"
dir.create(results_dir, recursive = TRUE)
dir.create(figures_dir, recursive = TRUE)
dir.create(sourcedata_dir, recursive = TRUE)
library(SingleCellExperiment)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(ggpubr)
library(moon) # Yingxin's personal package
library(pheatmap)
library(reshape2)
library(gridExtra)
library(RColorBrewer)
library(UpSetR)
library(scattermore)
library(scater)
library(scran)
library(ggridges)
library(rcartocolor)
library(Rtsne)
library(ggalluvial)
library(ggrepel)
library(BiocParallel)
library(BiocSingular)
library(BiocNeighbors)
library(openxlsx)
library(SparseArray)
ggplot2::theme_set(theme_bw() + theme_yx() +
theme(axis.text.y = element_text(color = "black"),
axis.text.x = element_text(color = "black")) )
rdbu <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100)
reds <- colorRampPalette(c("white", brewer.pal(n = 7, name = "Reds")))(100)
library(fgsea)
runFGSEA <- function(stats, pathways, scoreType = "std") {
fgseaRes <- fgsea(pathways = pathways,
stats = sort(stats),
minSize = 5,
maxSize = 10000,
nproc = 1,
scoreType = scoreType)
fgseaRes[order(fgseaRes$ES, decreasing = TRUE),]
}
library(org.Hs.eg.db)
library(clusterProfiler)
runGO <- function(gene_set, background, maxGSSize = 500) {
eg <- bitr(gene_set,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = "org.Hs.eg.db")
geneList <- bitr(background,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = "org.Hs.eg.db")
ego_res <- enrichGO(gene = eg$ENTREZID,
universe = geneList$ENTREZID,
OrgDb = org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "BH",
minGSSize = 10,
maxGSSize = maxGSSize,
pvalueCutoff = 1,
qvalueCutoff = 1,
readable = TRUE)
return(ego_res)
}
library(GO.db)
library(biomaRt)
library(org.Hs.eg.db)
retrieved <- AnnotationDbi::select(org.Hs.eg.db, keytype="GOALL", keys="GO:0034976", columns="SYMBOL")
#GO:0034976
#response to endoplasmic reticulum stress
#GO: 0140467
retrieved <- AnnotationDbi::select(org.Hs.eg.db, keytype="GOALL", keys="GO:0140467", columns="SYMBOL")
sce_melanoma_all <- readRDS("source_data/final/sce_melanoma.rds")
colnames(sce_melanoma_all) <- sce_melanoma_all$Barcode
# sce_melanoma <- readRDS("data/sce_melanoma.rds")
# colnames(sce_melanoma) <- sce_melanoma$Barcode
patient_color <- tableau_color_pal()(10)
names(patient_color) <- names(table(sce_melanoma_all$Patient_publish))
sample_color <- tableau_color_pal("Tableau 20")(20)
names(sample_color) <- names(table(sce_melanoma_all$Sample_publish))
numbat_classify <- readRDS("results/numbat_tumour_classification.rds")
table(numbat_classify, sce_melanoma_all$scClassify_celltype)
##
## numbat_classify B Cells Endothelial Cells Fibroblasts intermediate Macrophages
## normal 9616 12 1911 971 5028
## tumor 31 2 945 830 176
##
## numbat_classify Monocyte NK Cells Plasma Cells T cells Tumor cells unassigned
## normal 1549 1084 203 28437 2171 230
## tumor 139 7 49 166 114848 194
df_toPlot <- moon::makeMoonDF(sce_melanoma_all)
g1 <- ggplot(df_toPlot,
aes(x = UMAP1, y = UMAP2, color = scClassify_celltype)) +
geom_scattermore(pointsize = 1) +
theme(aspect.ratio = 1) +
scale_color_manual(values = RColorBrewer::brewer.pal(11, "Paired")) +
labs(color = "Cell types")
g2 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = numbat_classify)) +
geom_scattermore(pointsize = 1) +
theme(aspect.ratio = 1) +
scale_color_manual(values = RColorBrewer::brewer.pal(10, "Paired")[c(9, 10)]) +
labs(color = "Numbat classification")
g3 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = paste(numbat_classify == "tumor", scClassify_celltype == "Tumor cells"))) +
geom_scattermore(pointsize = 1) +
theme(aspect.ratio = 1) +
scale_color_manual(values = RColorBrewer::brewer.pal(10, "Paired")[c(1, 2, 9, 10)]) +
labs(color = "Numbat|scClassify")
g4 <- ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = ifelse(numbat_classify == "tumor" & scClassify_celltype == "Tumor cells",
"Tumor cells", "Others"))) +
geom_scattermore(pointsize = 1) +
theme(aspect.ratio = 1) +
scale_color_manual(values = RColorBrewer::brewer.pal(10, "Paired")[c(9, 10)]) +
labs(color = "Final")
ggarrange(g1, g2, g3, g4, ncol = 2, nrow = 2, align = "hv")
Subset the cells to melanoma cells based on both scClassify and numbat notation
sce_melanoma <- sce_melanoma_all[, sce_melanoma_all$scClassify_celltype == "Tumor cells" & numbat_classify == "tumor"]
sce_melanoma
## class: SingleCellExperiment
## dim: 30986 114848
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(30986): FAM138A OR4F5 ... AC213203.1 FAM231C
## rowData names(3): ID Symbol Type
## colnames(114848): AAACCCAAGAAACTGT-1 AAACGAAAGGGAGTTC-1 ...
## TTTGTTGGTTGACGGA-20 TTTGTTGTCGGTGTTA-20
## colData names(19): Sample Barcode ... Patient_publish Sample_publish
## reducedDimNames(2): PCA UMAP
## mainExpName: NULL
## altExpNames(0):
sce_melanoma_patient <- sce_melanoma[, sce_melanoma$Condition == "DMSO"]
melanoma_markers <- readxl::read_xlsx("data/mmc4-2.xlsx", skip = 1)
melanoma_markers <- split(melanoma_markers$Gene, melanoma_markers$Signature)
mean_trim_low <- function(x, trim = 0) {
mean(x[x >= quantile(x, trim)])
}
gene_scores <- lapply(melanoma_markers, function(x) {
apply(assay(sce_melanoma_patient, "logcounts")[intersect(x, rownames(sce_melanoma_patient)), ], 2,
mean_trim_low, trim = 0.1)
})
IFNg <- readxl::read_xlsx("data/Hallmark_IFNg.xlsx", skip = 1)
IFNg <- unlist(c(IFNg[, 1]))
IFNg <- intersect(IFNg, rownames(sce_melanoma_patient))
IFNg_scores <- apply(logcounts(sce_melanoma_patient)[IFNg, ], 2, mean_trim_low, trim = 0.1)
Nazarian_markers <- read.csv("data/Nazarian_mapk_signature.csv", header = FALSE)
Nazarian_markers <- intersect(Nazarian_markers$V1, rownames(sce_melanoma_patient))
Nazarian_scores <- apply(logcounts(sce_melanoma_patient)[Nazarian_markers, ], 2, mean_trim_low, trim = 0.1)
cc_genes <- lapply(Seurat::cc.genes.updated.2019, function(x) intersect(x, rownames(sce_melanoma_patient)))
cc_s_scores <- apply(logcounts(sce_melanoma_patient)[cc_genes$s.genes, ], 2, mean_trim_low, trim = 0.1)
cc_g2m_scores <- apply(logcounts(sce_melanoma_patient)[cc_genes$g2m.genes, ], 2, mean_trim_low, trim = 0.1)
hallmarkList <- readRDS("data/hallmarkList.rds")
hallmarkList <- lapply(hallmarkList, function(x) intersect(x, rownames(sce_melanoma_patient)))
hallmark_scores <- lapply(hallmarkList, function(x) {
colMeans(logcounts(sce_melanoma_patient)[intersect(x, rownames(sce_melanoma_patient)), ])
})
hvg_fit <- scran::modelGeneVar(sce_melanoma_patient, block = sce_melanoma_patient$Sample)
hvg <- scran::getTopHVGs(hvg_fit, n = 500)
set.seed(2022)
sce_melanoma_patient <- runPCA(sce_melanoma_patient, ncomponents = 20,
subset_row = hvg, BPPARAM = SerialParam(), BSPARAM = RandomParam())
set.seed(2022)
sce_melanoma_patient <- runUMAP(sce_melanoma_patient, dimred = "PCA", min_dist = 0.3, verbose = TRUE)
df_toPlot <- moon::makeMoonDF(sce_melanoma_patient)
nUMI <- colSums(counts(sce_melanoma_patient))
nGene <- colSums(counts(sce_melanoma_patient) != 0)
RPLprop <- colSums(counts(sce_melanoma_patient)[grep("RPL|RPS", rownames(sce_melanoma_patient)),])/colSums(counts(sce_melanoma_patient))
remove_nUMI <- nUMI < 7000 | nGene < 2000
df_toPlot <- moon::makeMoonDF(sce_melanoma_patient)
table(cc_g2m_scores > 1)
##
## FALSE TRUE
## 56341 8034
funMixModel <- function(x, mu1, mu2, sd1, sd2, rho1, rho2) {
dnorm(x, mean = mu1, sd = sd1) * rho1 -
dnorm(x, mean = mu2, sd = sd2) * rho2
}
# Function to get the threshold for correlation based on mixture model
getThreshold <- function(mixmdl, verbose = FALSE){
# if (verbose) {
# plot(mixmdl, which = 2)
# }
membership <- apply(mixmdl$posterior, 1, which.max)
m_list <- sort(unique(membership))
mu_list <- mixmdl$mu
names(mu_list) <- seq_len(length(mu_list))
mu_list <- mu_list[m_list]
if (length(mu_list) > 1) {
idx1 <- as.numeric(names(mu_list)[order(mu_list)][1])
idx2 <- as.numeric(names(mu_list)[order(mu_list)][2])
root <- try(uniroot(funMixModel, interval = c(mixmdl$mu[idx1] -
mixmdl$sigma[idx1],
mixmdl$mu[idx2] +
mixmdl$sigma[idx2]),
mu1 = mixmdl$mu[idx1], mu2 = mixmdl$mu[idx2],
sd1 = mixmdl$sigma[idx1], sd2 = mixmdl$sigma[idx2],
rho1 = mixmdl$lambda[idx1],
rho2 = mixmdl$lambda[idx2]),
silent = TRUE)
if (!"try-error" %in% is(root)) {
t <- root$root
}else{
t <- 0
}
}else{
t <- 0
}
return(t)
}
set.seed(2022)
mixmdl_g2m <- try(mixtools::normalmixEM(cc_g2m_scores,
fast = TRUE,
maxrestarts = 100,
k = 2,
maxit = 1000,
# mu = c(-0.5, rep(0.5, length_unique_y - 2), 1),
# lambda = c(1/2),
# sigma = rep(0.2, length_unique_y),
ECM = TRUE,
verb = TRUE),
silent = TRUE)
## iteration = 1 log-lik diff = 59569.36 log-lik = -19674.84
## iteration = 2 log-lik diff = 21198.26 log-lik = 1523.421
## iteration = 3 log-lik diff = 1418.905 log-lik = 2942.326
## iteration = 4 log-lik diff = 130.5359 log-lik = 3072.861
## iteration = 5 log-lik diff = 18.09592 log-lik = 3090.957
## iteration = 6 log-lik diff = 5.008508 log-lik = 3095.966
## iteration = 7 log-lik diff = 1.895995 log-lik = 3097.862
## iteration = 8 log-lik diff = 0.7641971 log-lik = 3098.626
## iteration = 9 log-lik diff = 0.3112101 log-lik = 3098.937
## iteration = 10 log-lik diff = 0.1269754 log-lik = 3099.064
## iteration = 11 log-lik diff = 0.05183133 log-lik = 3099.116
## iteration = 12 log-lik diff = 0.02116152 log-lik = 3099.137
## iteration = 13 log-lik diff = 0.008640622 log-lik = 3099.146
## iteration = 14 log-lik diff = 0.003528332 log-lik = 3099.149
## iteration = 15 log-lik diff = 0.001440822 log-lik = 3099.151
## iteration = 16 log-lik diff = 0.0005883851 log-lik = 3099.151
## iteration = 17 log-lik diff = 0.0002402811 log-lik = 3099.152
## iteration = 18 log-lik diff = 9.812552e-05 log-lik = 3099.152
## iteration = 19 log-lik diff = 4.007255e-05 log-lik = 3099.152
## iteration = 20 log-lik diff = 1.636492e-05 log-lik = 3099.152
## iteration = 21 log-lik diff = 6.683149e-06 log-lik = 3099.152
## iteration = 22 log-lik diff = 2.729299e-06 log-lik = 3099.152
## iteration = 23 log-lik diff = 1.114597e-06 log-lik = 3099.152
## iteration = 24 log-lik diff = 4.551912e-07 log-lik = 3099.152
## iteration = 25 log-lik diff = 1.858862e-07 log-lik = 3099.152
## iteration = 26 log-lik diff = 7.591734e-08 log-lik = 3099.152
## iteration = 27 log-lik diff = 3.100286e-08 log-lik = 3099.152
## iteration = 28 log-lik diff = 1.266017e-08 log-lik = 3099.152
## iteration = 29 log-lik diff = 5.16593e-09 log-lik = 3099.152
## number of iterations= 29
mixmdl_g2m_threshold <- getThreshold(mixmdl_g2m)
set.seed(2022)
mixmdl_s <- try(mixtools::normalmixEM(cc_s_scores,
fast = TRUE,
maxrestarts = 100,
k = 2,
maxit = 1000,
# mu = c(-0.5, rep(0.5, length_unique_y - 2), 1),
# lambda = c(1/2),
# sigma = rep(0.2, length_unique_y),
ECM = TRUE,
verb = TRUE),
silent = TRUE)
## iteration = 1 log-lik diff = 53419.31 log-lik = -13111.65
## iteration = 2 log-lik diff = 20984.7 log-lik = 7873.05
## iteration = 3 log-lik diff = 1981.623 log-lik = 9854.673
## iteration = 4 log-lik diff = 394.7357 log-lik = 10249.41
## iteration = 5 log-lik diff = 75.20056 log-lik = 10324.61
## iteration = 6 log-lik diff = 18.8214 log-lik = 10343.43
## iteration = 7 log-lik diff = 7.180167 log-lik = 10350.61
## iteration = 8 log-lik diff = 3.433998 log-lik = 10354.04
## iteration = 9 log-lik diff = 1.760211 log-lik = 10355.8
## iteration = 10 log-lik diff = 0.9182721 log-lik = 10356.72
## iteration = 11 log-lik diff = 0.4812294 log-lik = 10357.2
## iteration = 12 log-lik diff = 0.2525403 log-lik = 10357.46
## iteration = 13 log-lik diff = 0.1326015 log-lik = 10357.59
## iteration = 14 log-lik diff = 0.06964569 log-lik = 10357.66
## iteration = 15 log-lik diff = 0.03658657 log-lik = 10357.7
## iteration = 16 log-lik diff = 0.01922231 log-lik = 10357.71
## iteration = 17 log-lik diff = 0.0101002 log-lik = 10357.73
## iteration = 18 log-lik diff = 0.005307422 log-lik = 10357.73
## iteration = 19 log-lik diff = 0.002789063 log-lik = 10357.73
## iteration = 20 log-lik diff = 0.001465711 log-lik = 10357.73
## iteration = 21 log-lik diff = 0.0007702814 log-lik = 10357.74
## iteration = 22 log-lik diff = 0.0004048169 log-lik = 10357.74
## iteration = 23 log-lik diff = 0.000212752 log-lik = 10357.74
## iteration = 24 log-lik diff = 0.0001118132 log-lik = 10357.74
## iteration = 25 log-lik diff = 5.876455e-05 log-lik = 10357.74
## iteration = 26 log-lik diff = 3.088445e-05 log-lik = 10357.74
## iteration = 27 log-lik diff = 1.623177e-05 log-lik = 10357.74
## iteration = 28 log-lik diff = 8.530886e-06 log-lik = 10357.74
## iteration = 29 log-lik diff = 4.483532e-06 log-lik = 10357.74
## iteration = 30 log-lik diff = 2.356421e-06 log-lik = 10357.74
## iteration = 31 log-lik diff = 1.238455e-06 log-lik = 10357.74
## iteration = 32 log-lik diff = 6.508781e-07 log-lik = 10357.74
## iteration = 33 log-lik diff = 3.42101e-07 log-lik = 10357.74
## iteration = 34 log-lik diff = 1.797889e-07 log-lik = 10357.74
## iteration = 35 log-lik diff = 9.450014e-08 log-lik = 10357.74
## iteration = 36 log-lik diff = 4.965113e-08 log-lik = 10357.74
## iteration = 37 log-lik diff = 2.610614e-08 log-lik = 10357.74
## iteration = 38 log-lik diff = 1.372246e-08 log-lik = 10357.74
## iteration = 39 log-lik diff = 7.203198e-09 log-lik = 10357.74
## number of iterations= 39
mixmdl_s_threshold <- getThreshold(mixmdl_s)
set.seed(2022)
cc_scores <- cc_s_scores + cc_g2m_scores
mixmdl_cc <- try(mixtools::normalmixEM(cc_scores,
fast = TRUE,
maxrestarts = 100,
k = 2,
maxit = 1000,
# mu = c(-0.5, rep(0.5, length_unique_y - 2), 1),
# lambda = c(1/2),
# sigma = rep(0.2, length_unique_y),
ECM = TRUE,
verb = TRUE),
silent = TRUE)
## iteration = 1 log-lik diff = 54107.81 log-lik = -56252.45
## iteration = 2 log-lik diff = 22788.71 log-lik = -33463.73
## iteration = 3 log-lik diff = 1935.718 log-lik = -31528.01
## iteration = 4 log-lik diff = 238.6751 log-lik = -31289.34
## iteration = 5 log-lik diff = 25.48529 log-lik = -31263.85
## iteration = 6 log-lik diff = 3.953839 log-lik = -31259.9
## iteration = 7 log-lik diff = 1.354638 log-lik = -31258.55
## iteration = 8 log-lik diff = 0.674628 log-lik = -31257.87
## iteration = 9 log-lik diff = 0.3616927 log-lik = -31257.51
## iteration = 10 log-lik diff = 0.1961173 log-lik = -31257.31
## iteration = 11 log-lik diff = 0.1065305 log-lik = -31257.21
## iteration = 12 log-lik diff = 0.05788899 log-lik = -31257.15
## iteration = 13 log-lik diff = 0.03146127 log-lik = -31257.12
## iteration = 14 log-lik diff = 0.01709973 log-lik = -31257.1
## iteration = 15 log-lik diff = 0.009294462 log-lik = -31257.09
## iteration = 16 log-lik diff = 0.005052139 log-lik = -31257.09
## iteration = 17 log-lik diff = 0.002746237 log-lik = -31257.08
## iteration = 18 log-lik diff = 0.001492826 log-lik = -31257.08
## iteration = 19 log-lik diff = 0.0008114962 log-lik = -31257.08
## iteration = 20 log-lik diff = 0.0004411319 log-lik = -31257.08
## iteration = 21 log-lik diff = 0.0002398025 log-lik = -31257.08
## iteration = 22 log-lik diff = 0.0001303592 log-lik = -31257.08
## iteration = 23 log-lik diff = 7.086492e-05 log-lik = -31257.08
## iteration = 24 log-lik diff = 3.852319e-05 log-lik = -31257.08
## iteration = 25 log-lik diff = 2.094182e-05 log-lik = -31257.08
## iteration = 26 log-lik diff = 1.138433e-05 log-lik = -31257.08
## iteration = 27 log-lik diff = 6.188708e-06 log-lik = -31257.08
## iteration = 28 log-lik diff = 3.364294e-06 log-lik = -31257.08
## iteration = 29 log-lik diff = 1.828896e-06 log-lik = -31257.08
## iteration = 30 log-lik diff = 9.942196e-07 log-lik = -31257.08
## iteration = 31 log-lik diff = 5.404763e-07 log-lik = -31257.08
## iteration = 32 log-lik diff = 2.938104e-07 log-lik = -31257.08
## iteration = 33 log-lik diff = 1.597182e-07 log-lik = -31257.08
## iteration = 34 log-lik diff = 8.683128e-08 log-lik = -31257.08
## iteration = 35 log-lik diff = 4.720641e-08 log-lik = -31257.08
## iteration = 36 log-lik diff = 2.565866e-08 log-lik = -31257.08
## iteration = 37 log-lik diff = 1.394801e-08 log-lik = -31257.08
## iteration = 38 log-lik diff = 7.585186e-09 log-lik = -31257.08
## number of iterations= 38
mixmdl_cc_threshold <- getThreshold(mixmdl_cc)
mixmdl_g2m_threshold
## [1] 0.5013625
mixmdl_s_threshold
## [1] 0.4217393
cell_cycle_cells <- cc_g2m_scores > mixmdl_g2m_threshold | cc_s_scores > mixmdl_s_threshold
table(cell_cycle_cells)
## cell_cycle_cells
## FALSE TRUE
## 47587 16788
inflammatroy_cells <- hallmark_scores$HALLMARK_INFLAMMATORY_RESPONSE > 0.4
table(cell_cycle_cells)
## cell_cycle_cells
## FALSE TRUE
## 47587 16788
table(inflammatroy_cells)
## inflammatroy_cells
## FALSE TRUE
## 62646 1729
table(remove_nUMI)
## remove_nUMI
## FALSE TRUE
## 50353 14022
Remove the cells that are in cell cycle, inflamatory and with low UMI
pure_cells <- !cell_cycle_cells & !inflammatroy_cells & !remove_nUMI
table(pure_cells)
## pure_cells
## FALSE TRUE
## 30807 33568
ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2, color = pure_cells)) +
geom_scattermore(pointsize = 1) +
theme(aspect.ratio = 1) +
scale_color_viridis_d()
cc_prop <- table(cell_cycle_cells, sce_melanoma_patient$Sample)
round(apply(cc_prop, 2, function(x) x/sum(x)), 2)
##
## cell_cycle_cells SCC16_0069_B230216_DMSO SCC16_0500_B101017_DMSO
## FALSE 0.87 0.3
## TRUE 0.13 0.7
##
## cell_cycle_cells SCC20_0352_B291020_DMSO SMU09_0157_B110419_DMSO
## FALSE 0.97 0.92
## TRUE 0.03 0.08
##
## cell_cycle_cells SMU17_0075_B070921_DMSO SMU17_0132_B180418_DMSO
## FALSE 0.75 0.72
## TRUE 0.25 0.28
##
## cell_cycle_cells SMU17_0209_B130617_DMSO SMU18_0017_B110518_DMSO
## FALSE 0.6 0.79
## TRUE 0.4 0.21
##
## cell_cycle_cells SMU18_0283_B090119_DMSO SMU19_0306_B290620_DMSO
## FALSE 0.87 0.74
## TRUE 0.13 0.26
sce_melanoma_patient_pure <- sce_melanoma_patient[, pure_cells]
sce_melanoma_patient_pure
## class: SingleCellExperiment
## dim: 30986 33568
## metadata(1): Samples
## assays(2): counts logcounts
## rownames(30986): FAM138A OR4F5 ... AC213203.1 FAM231C
## rowData names(3): ID Symbol Type
## colnames(33568): AAACGAAAGGGAGTTC-1 AAACGCTGTTTCGTAG-1 ...
## TTTGTTGGTGGCTTGC-19 TTTGTTGTCGGTTCAA-19
## colData names(19): Sample Barcode ... Patient_publish Sample_publish
## reducedDimNames(2): PCA UMAP
## mainExpName: NULL
## altExpNames(0):
library(inline)
openblas.set.num.threads <- cfunction( signature(ipt="integer"),
body = 'openblas_set_num_threads(*ipt);',
otherdefs = c ('extern void openblas_set_num_threads(int);'),
libargs = c ('/usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3'),
language = "C",
convention = ".C"
)
openblas.set.num.threads(1)
## $ipt
## [1] 1
library(HDF5Array)
library(rhdf5)
library(scMerge)
data(segList)
batch_label <- sce_melanoma_patient_pure$Patient
ctl_genes <- intersect(rownames(sce_melanoma_patient_pure), segList$human$human_scSEG)
ncores <- 10
hvg <- intersect(unlist(melanoma_markers), rownames(sce_melanoma_patient_pure))
hvg <- setdiff(hvg, c("GDF15", "LGALS3"))
start <- Sys.time()
scMerge_res <- scMerge2(exprsMat = logcounts(sce_melanoma_patient_pure),
batch = batch_label,
cellTypes = NULL,
condition = NULL,
use_bpparam = BiocParallel::MulticoreParam(workers = ncores),
use_bsparam = BiocSingular::RandomParam(),
use_bnparam = BiocNeighbors::AnnoyParam(),
ruvK = 20,
ctl = ctl_genes,
exprsMat_counts = NULL,
k_celltype = 10,
chosen.hvg = hvg,
cosineNorm = FALSE,
return_subset = FALSE,
seed = 1)
## [1] "Cluster within batch"
## [1] "Constructing pseudo-bulk"
## Dimension of pseudo-bulk expression: [1] 30986 1678
## [1] "Identifying MNC using pseudo-bulk:"
## [1] "Running RUV"
end <- Sys.time()
scMerge_res$newY[scMerge_res$newY < 0.001] <- 0
#scMerge_res$newY <- as(scMerge_res$newY, "dgCMatrix")
assay(sce_melanoma_patient_pure, "scMerge") <- scMerge_res$newY
saveRDS(scMerge_res, file = file.path(sourcedata_dir, "scMerge_res_melanoma_tumor_pure.rds"))
filter <- grepl("RPL|RPS|^MT-|ERCC|MTMR|MTND", rownames(sce_melanoma_patient_pure))
gene_corMat <- qlcMatrix::cosSparse(t((logcounts(sce_melanoma_patient_pure)[filter, ])),
t((logcounts(sce_melanoma_patient_pure)[!filter, ])))
gene_corMat_max <- apply(gene_corMat, 2, max, na.rm = TRUE)
exclude_genes <- c(rownames(sce_melanoma_patient_pure)[filter],
names(gene_corMat_max)[gene_corMat_max > 0.8])
filter <- rownames(sce_melanoma_patient_pure) %in% exclude_genes
feature_selection <- function(exprsMat, batch, top_n = 2000, BPPARAM = SerialParam()) {
combined.dec <- scran::modelGeneVar(exprsMat, block = batch, BPPARAM = BPPARAM)
chosen.hvgs <- scran::getTopHVGs(combined.dec, n = top_n)
return(chosen.hvgs)
}
chosen.hvg <- feature_selection(logcounts(sce_melanoma_patient_pure),
sce_melanoma_patient_pure$Sample,
top_n = 500)
chosen.hvg <- setdiff(chosen.hvg, exclude_genes)
length(chosen.hvg)
## [1] 483
set.seed(2022)
sce_melanoma_patient_pure <- runPCA(sce_melanoma_patient_pure,
ncomponents = 10,
#subset_row = setdiff(chosen.hvg, exclude_genes),
BPPARAM = SerialParam(),
BSPARAM = RandomParam(),
exprs_values = "scMerge")
set.seed(2022)
sce_melanoma_patient_pure <- runUMAP(sce_melanoma_patient_pure, dimred = "PCA", min_dist = 0.3, verbose = TRUE)
df_toPlot <- moon::makeMoonDF(sce_melanoma_patient_pure)
df_toPlot$Sample_publish <- factor(df_toPlot$Sample_publish, levels = unique(df_toPlot$Sample_publish))
df_toPlot$Patient_publish <- factor(df_toPlot$Patient_publish, levels = unique(df_toPlot$Patient_publish))
g1 <- ggplot(df_toPlot[sample(nrow(df_toPlot)),], aes(x = UMAP1, y = UMAP2, color = Patient_publish)) +
geom_scattermore(pointsize = 2) +
theme(aspect.ratio = 1) +
scale_color_manual(values = patient_color)
g1
ggsave(file.path(figures_dir, "UMAP_melanoma_reference.pdf"), width = 6, height = 5)
g1 + facet_wrap(~Patient_publish, ncol = 5)
ggsave(file.path(figures_dir, "UMAP_melanoma_reference_bySample.pdf"), width = 14, height = 5)
saveRDS(df_toPlot[, c("UMAP1", "UMAP2", "Patient_publish")],
file = file.path(sourcedata_dir,
"UMAP_melanoma_reference_bySample.rds"))
seurat_obj <- Seurat::CreateSeuratObject(counts = counts(sce_melanoma_patient_pure))
seurat_obj@assays$RNA@data <- logcounts(sce_melanoma_patient_pure)
for(i in names(melanoma_markers)) {
seurat_obj <- Seurat::AddModuleScore(seurat_obj, list(intersect(rownames(seurat_obj),
melanoma_markers[[i]])),
name = i)
}
seurat_module_scores <- seurat_obj@meta.data[, -c(1:3)]
colnames(seurat_module_scores) <- names(melanoma_markers)
g <- lapply(names(gene_scores)[c(1, 4, 2, 6)], function(x) {
#df_toPlot$scores <- gene_scores[[x]][colnames(sce_melanoma_patient_pure)]
df_toPlot$scores <- seurat_module_scores[colnames(sce_melanoma_patient_pure), x]
ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2,
color = scale(scores))) +
geom_scattermore(pointsize = 2) +
theme(aspect.ratio = 1, legend.position = "bottom") +
scale_color_gradient2( low = "#2166AC",
mid = "white",
high = "#B2182B") +
labs(color = x)
})
ggarrange(plotlist = g, ncol = 4, nrow = 1, align = "hv")
ggsave(file.path(figures_dir, "UMAP_melanoma_reference_byScore.pdf"), width = 12, height = 5)
saveRDS(cbind(df_toPlot[, c("UMAP1", "UMAP2")],
apply(seurat_module_scores, 2, scale)),
file = file.path(sourcedata_dir,
"UMAP_melanoma_reference_byScore.rds"))
g <- lapply(names(hallmark_scores), function(x) {
df_toPlot$scores <- hallmark_scores[[x]][colnames(sce_melanoma_patient_pure)]
ggplot(df_toPlot, aes(x = UMAP1, y = UMAP2,
color = scores)) +
geom_scattermore(pointsize = 4) +
theme(aspect.ratio = 1, legend.position = "bottom") +
scale_color_viridis_c() +
labs(color = x)
})
ggarrange(plotlist = g, ncol = 3, nrow = 3, align = "hv")
## $`1`
##
## $`2`
##
## $`3`
##
## $`4`
##
## $`5`